### Measuring Political Attitudes with Word Association - Frequency Analysis ###
## Ze Han, Ph.D. Student, Princeton University Department of Politics, zeh@princeton.edu ##
## Naijia Liu, Assistant Professor, Harvard University Department of Government, naijialiu@fas.harvard.edu ##
## Rory Truex, Associate Professor, Princeton University Department of Politics and School of Public and International Affairs, rtruex@princeton.edu ##

setwd("~/Desktop/WAT_PoQ/code")

## Load Packages ##
rm(list=ls(all=TRUE))

library(tidyverse)
library(sandwich)
library(lmtest)
library(jtools)
library(huxtable)
library(broom.mixed)
library("jiebaR")
library("jiebaRD")
library("stargazer")
library("quanteda")
library("lfe")

## Load Datasets##
data.wat.filt <- read.csv("./data/data.wat.filt.csv", stringsAsFactors = FALSE)
data.hk.wat.filt <- read.csv("./data/data.hk.wat.filt.csv", stringsAsFactors = FALSE)
fw.full.key <- read.csv("./data/fw.full.key.zh.csv", stringsAsFactors = FALSE) 
fw.full.key.hk <- read.csv("./data/fw.full.key.hk.csv", stringsAsFactors = FALSE)
dicwp05 <- read.csv("./data/dicwp05.csv", stringsAsFactors = FALSE)
dicwp <- read.csv("./data/dicwp.csv", stringsAsFactors = FALSE)

## Study 1 - Mainland China ##

# Frequency Analysis
data.wat.filt$response <- sub('[[:digit:]]+', '', data.wat.filt$response)
data.wat.cg <- filter(data.wat.filt, term.eng=="central government")
data.wat.ccp <- filter(data.wat.filt, term.eng=="CCP")
data.wat.democracy <- filter(data.wat.filt, term.eng=="democracy")
data.wat.china <- filter(data.wat.filt, term.eng=="China")

# Create a function called "cutter"
# Load the Sogou Standard Words List
# Load the Chinese Stopwords List
cutter <- worker(user = "./data/Sogou.txt",
                 stop_word = "./data/stop_words.utf8",
                 symbol = F,
                 bylines = F)

# Central government
fw.cg <- segment(data.wat.cg$response, cutter) %>%
  as_tibble() %>%
  filter(!str_detect(value, "中央政府|中央|政府")) %>%
  count(value, name = "freq", sort = TRUE) %>%
  mutate(
    prop = freq / length(data.wat.cg$ResponseID),
    words = value
  ) %>%
  select(words, freq, prop) %>% 
  left_join(fw.full.key, by = "words")

# Democracy
fw.dem <- segment(data.wat.democracy$response, cutter) %>%
  as_tibble() %>%
  filter(!str_detect(value, "民主")) %>%
  count(value, name = "freq", sort = TRUE) %>%
  mutate(
    prop = freq / length(data.wat.democracy$ResponseID),
    words = value
  ) %>%
  select(words, freq, prop) %>% 
  left_join(fw.full.key, by = "words")

# China
fw.china <- segment(data.wat.china$response, cutter) %>%
  as_tibble() %>%
  filter(!str_detect(value, "中国")) %>%
  count(value, name = "freq", sort = TRUE) %>%
  mutate(
    prop = freq / length(data.wat.china$ResponseID),
    words = value
  ) %>%
  select(words, freq, prop) %>% 
  left_join(fw.full.key, by = "words")

# CCP
fw.ccp <- segment(data.wat.ccp$response, cutter) %>%
  as_tibble() %>%
  filter(!str_detect(value, "中共")) %>%
  count(value, name = "freq", sort = TRUE) %>%
  mutate(
    prop = freq / length(data.wat.ccp$ResponseID),
    words = value
  ) %>%
  select(words, freq, prop) %>% 
  left_join(fw.full.key, by = "words")

# Table 2: Most Common Responses for Regime Cue Words (Mainland China) - Filtered
fw.cg.filt <- fw.cg %>% 
  filter(prop > 0.01) %>%
  arrange(desc(freq)) %>% 
  mutate(prop = round(prop, 3)) %>% 
  select(term.eng, freq, prop) %>% 
  rename(Response = term.eng,
         Freq = freq,
         `p(r|c)` = prop)

fw.ccp.filt <- fw.ccp %>% 
  filter(prop > 0.01) %>%
  arrange(desc(freq)) %>% 
  mutate(prop = round(prop, 3)) %>% 
  select(term.eng, freq, prop) %>% 
  rename(Response = term.eng,
         Freq = freq,
         `p(r|c)` = prop)

fw.china.filt <- fw.china %>% 
  filter(prop > 0.01) %>%
  arrange(desc(freq)) %>% 
  mutate(prop = round(prop, 3)) %>% 
  select(term.eng, freq, prop) %>% 
  rename(Response = term.eng,
         Freq = freq,
         `p(r|c)` = prop)

write.csv(fw.cg.filt, "./tables/fw.cg.filt.csv", row.names = F)
write.csv(fw.ccp.filt, "./tables/fw.ccp.filt.csv", row.names = F)
write.csv(fw.china.filt, "./tables/fw.china.filt.csv", row.names = F)

# Dictionary Analysis
data.wat.democracy <- data.wat.democracy %>%
  mutate(response2 = map_chr(response, ~ paste(segment(.x, cutter), collapse = " "))) %>% 
  mutate(wp05 = map_lgl(response2, ~ any(sapply(dicwp05$cooc, grepl, .x)))) %>% 
  mutate(wp = map_lgl(response2, ~ any(sapply(dicwp$cooc, grepl, .x))))

d1.1 <- glm(wp05 ~ female + age + lowed + ccp + hukou.agr, 
            data = data.wat.democracy, family = binomial(link = "probit"))
d1.2 <- glm(wp05 ~ female + age + lowed + ccp + hukou.agr + sat.central, 
            data = data.wat.democracy, family = binomial(link = "probit"))
d1.3 <- glm(wp ~ female + age + lowed + ccp + hukou.agr, 
            data = data.wat.democracy, family = binomial(link = "probit"))
d1.4 <- glm(wp ~ female + age + lowed + ccp + hukou.agr + sat.central, 
            data = data.wat.democracy, family = binomial(link = "probit"))

r1.1 <- coeftest(d1.1, vcov = vcovHC(d1.1, type = "HC1"))
r1.2 <- coeftest(d1.2, vcov = vcovHC(d1.2, type = "HC1"))
r1.3 <- coeftest(d1.3, vcov = vcovHC(d1.3, type = "HC1"))
r1.4 <- coeftest(d1.4, vcov = vcovHC(d1.4, type = "HC1"))

p1.1 <- r1.1[, 4]
p1.2 <- r1.2[, 4]
p1.3 <- r1.3[, 4]
p1.4 <- r1.4[, 4]

# Table SI15: Determinants of CCP Narrative Responses (Mainland China) - Filtered
stargazer(d1.1, d1.2, d1.3, d1.4,
          p = list(p1.1, p1.2, p1.3, p1.4), 
          omit.stat = c("LL","ser","f"),
          report=("vc*p"),
          covariate.labels = c("Female", "Age", "Low Education", "CCP Membership", "Agricultural Hukou", "Satisfaction with the Central Government"),
          dep.var.labels.include = FALSE)

# Figure 8: Determinants of CCP Narrative Responses (Mainland China) - Filtered
p1 <- plot_summs(d1.1, d1.2, d1.3, d1.4, robust = TRUE, robust_type = "HC1",
           model.names = c("Model 1", "Model 2", "Model 3", "Model 4"),
           #inner_ci_level = .9,
           coefs = c(
             "Female" = "female",
             "Age" = "age",
             "Low Education" = "lowed",
             "CCP Membership" = "ccp",
             "Agricultural Hukou" = "hukou.agr",
             "Satisfaction with Central Government" = "sat.central")) +
  theme_bw() +
  theme(legend.title=element_blank(),
        panel.grid.major=element_blank(),
        panel.border=element_blank(),
        axis.text=element_text(size=11),
        axis.title=element_text(size=11),
        legend.text = element_text(size = 11),
        legend.position = "right") +
  labs(y = NULL) +
  guides(color = guide_legend(nrow = 4))
ggsave("./figures/fig-coef-dict.png", p1, width = 8, height = 6)

## Study 2 - Hong Kong ##

# Frequency Analysis
data.hk.wat.filt$response <- sub('[[:digit:]]+', '', data.hk.wat.filt$response)
data.hk.wat.cg <- filter(data.hk.wat.filt, term.eng=="central government")
data.hk.wat.ccp <- filter(data.hk.wat.filt, term.eng=="CCP")
data.hk.wat.democracy <- filter(data.hk.wat.filt, term.eng=="democracy")
data.hk.wat.china <- filter(data.hk.wat.filt, term.eng=="China")

# Central government
fw.hk.cg <- segment(data.hk.wat.cg$response, cutter) %>%
  as_tibble() %>%
  filter(!str_detect(value, "中央政府|中央|政府")) %>%
  count(value, name = "freq", sort = TRUE) %>%
  mutate(
    prop = freq / length(data.hk.wat.cg$ResponseID),
    words = value
  ) %>%
  select(words, freq, prop) %>% 
  left_join(fw.full.key.hk, by = "words")

# Democracy
fw.hk.dem <- segment(data.hk.wat.democracy$response, cutter) %>%
  as_tibble() %>%
  filter(!str_detect(value, "民主")) %>%
  count(value, name = "freq", sort = TRUE) %>%
  mutate(
    prop = freq / length(data.hk.wat.democracy$ResponseID),
    words = value
  ) %>%
  select(words, freq, prop) %>% 
  left_join(fw.full.key.hk, by = "words")

# China
fw.hk.china <- segment(data.hk.wat.china$response, cutter) %>%
  as_tibble() %>%
  filter(!str_detect(value, "中国")) %>%
  count(value, name = "freq", sort = TRUE) %>%
  mutate(
    prop = freq / length(data.hk.wat.china$ResponseID),
    words = value
  ) %>%
  select(words, freq, prop) %>% 
  left_join(fw.full.key.hk, by = "words")

# CCP
fw.hk.ccp <- segment(data.hk.wat.ccp$response, cutter) %>%
  as_tibble() %>%
  filter(!str_detect(value, "中共")) %>%
  count(value, name = "freq", sort = TRUE) %>%
  mutate(
    prop = freq / length(data.hk.wat.ccp$ResponseID),
    words = value
  ) %>%
  select(words, freq, prop) %>% 
  left_join(fw.full.key.hk, by = "words")

# Table SI6: Most Common Responses for Regime Cue Words (Hong Kong) - Filtered
fw.hk.cg.filt <- fw.hk.cg %>% 
  filter(prop > 0.01) %>%
  arrange(desc(freq)) %>% 
  mutate(prop = round(prop, 3)) %>% 
  select(term.eng, freq, prop) %>% 
  rename(Response = term.eng,
         Freq = freq,
         `p(r|c)` = prop)

fw.hk.ccp.filt <- fw.hk.ccp %>% 
  filter(prop > 0.01) %>%
  arrange(desc(freq)) %>% 
  mutate(prop = round(prop, 3)) %>% 
  select(term.eng, freq, prop) %>% 
  rename(Response = term.eng,
         Freq = freq,
         `p(r|c)` = prop)

fw.hk.china.filt <- fw.hk.china %>% 
  filter(prop > 0.01) %>%
  arrange(desc(freq)) %>% 
  mutate(prop = round(prop, 3)) %>% 
  select(term.eng, freq, prop) %>% 
  rename(Response = term.eng,
         Freq = freq,
         `p(r|c)` = prop)

write.csv(fw.hk.cg.filt, "./tables/fw.hk.cg.filt.csv", row.names = F)
write.csv(fw.hk.ccp.filt, "./tables/fw.hk.ccp.filt.csv", row.names = F)
write.csv(fw.hk.china.filt, "./tables/fw.hk.china.filt.csv", row.names = F)

## Hong Kong - Mainland China Samples Comparison ##

# Figure 6: Mainland China-Hong Kong Response Comparison for Democracy Cue
fw.hk.dem <- fw.hk.dem %>%
  distinct(words, .keep_all = TRUE)

fw.dem <- fw.dem %>%
  distinct(words, .keep_all = TRUE)

comp.dem <- fw.dem %>%
  full_join(fw.hk.dem, by = "words") %>%
  mutate(
    freq.tot = coalesce(freq.x, 0) + coalesce(freq.y, 0),
    prop.diff = coalesce(prop.x, 0) - coalesce(prop.y, 0),
    term.eng = coalesce(term.eng.x, term.eng.y)
  ) %>%
  filter(freq.tot > 9)

pdf('./figures/fig-samplecomp-dem.pdf', width = 8, height = 7)
ggplot(comp.dem, aes(x = prop.diff, y = reorder(term.eng, prop.diff), size = freq.tot)) + 
  xlab("Probability Difference (Mainland China - Hong Kong)") + ylab("Response") + 
  xlim(c(-.12,.12)) + scale_shape(solid = TRUE) + theme_classic()  + 
  geom_point(alpha = .7, col = "grey75") + ggtitle("Cue Word: Democracy") + 
  geom_vline(xintercept = 0, linetype = "dashed", size=.3)  + 
  scale_size_continuous(breaks = c(10, 20, 50, 100, 200, 500, 750), range = c(1, 10), name="Total Frequency") + 
  theme(plot.title = element_text(size = 11))
dev.off() 

# Figure 7: Mainland China-Hong Kong Response Comparison for CCP Cue
fw.hk.ccp <- fw.hk.ccp %>%
  distinct(words, .keep_all = TRUE)

fw.ccp <- fw.ccp %>%
  distinct(words, .keep_all = TRUE)

comp.ccp <- fw.ccp %>%
  full_join(fw.hk.ccp, by = "words") %>%
  mutate(
    freq.tot = coalesce(freq.x, 0) + coalesce(freq.y, 0),
    prop.diff = coalesce(prop.x, 0) - coalesce(prop.y, 0),
    term.eng = coalesce(term.eng.x, term.eng.y)
  ) %>%
  filter(freq.tot > 9)

pdf('./figures/fig-samplecomp-ccp.pdf', width = 8, height = 7)
ggplot(comp.ccp, aes(x = prop.diff, y = reorder(term.eng,prop.diff), size = freq.tot)) + 
  xlab("Probability Difference (Mainland China - Hong Kong)") + ylab("Response") + 
  xlim(c(-.16,.12)) + scale_shape(solid = TRUE) + theme_classic()  + 
  geom_point(alpha = .7, col = "grey75") + ggtitle("Cue Word: CCP") + 
  geom_vline(xintercept = 0, linetype = "dashed", size = .3)  + 
  scale_size_continuous(breaks=c(10, 20, 50, 100, 200, 500), range = c(1, 10), name="Total Frequency") + 
  theme(plot.title = element_text(size = 11))
dev.off() 

# Figure SI9: Mainland China-Hong Kong Response Comparison for China Cue
fw.hk.china <- fw.hk.china %>%
  distinct(words, .keep_all = TRUE)

fw.china <- fw.china %>%
  distinct(words, .keep_all = TRUE)

comp.china <- fw.china %>%
  full_join(fw.hk.china, by = "words") %>%
  mutate(
    freq.tot = coalesce(freq.x, 0) + coalesce(freq.y, 0),
    prop.diff = coalesce(prop.x, 0) - coalesce(prop.y, 0),
    term.eng = coalesce(term.eng.x, term.eng.y)
  ) %>%
  filter(freq.tot > 9)

pdf('./figures/fig-samplecomp-china.pdf', width=8, height=7)
ggplot(comp.china, aes(x = prop.diff, y = reorder(term.eng,prop.diff), size = freq.tot)) + 
  xlab("Probability Difference (Mainland China - Hong Kong)") + ylab("Response") + 
  xlim(c(-.15,.15)) + scale_shape(solid = TRUE) + theme_classic()  + geom_point(alpha=.7, col="grey75") + 
  ggtitle("Cue Word: China") + geom_vline(xintercept = 0, linetype = "dashed", size = .3)  + 
  scale_size_continuous(breaks = c(10, 20, 50, 100, 200, 500), range = c(1, 10), name = "Total Frequency") + 
  theme(plot.title = element_text(size = 11))
dev.off() 

# Figure SI10: Mainland China-Hong Kong Response Comparison for Central Government Cue
fw.hk.cg <- fw.hk.cg %>%
  distinct(words, .keep_all = TRUE)

fw.cg <- fw.cg %>%
  distinct(words, .keep_all = TRUE)

comp.cg <- fw.cg %>%
  full_join(fw.hk.cg, by = "words") %>%
  mutate(
    freq.tot = coalesce(freq.x, 0) + coalesce(freq.y, 0),
    prop.diff = coalesce(prop.x, 0) - coalesce(prop.y, 0),
    term.eng = coalesce(term.eng.x, term.eng.y)
  ) %>%
  filter(freq.tot > 9)

pdf('./figures/fig-samplecomp-cg.pdf', width = 8, height = 7)
ggplot(comp.cg, aes(x = prop.diff, y = reorder(term.eng,prop.diff), size = freq.tot)) + 
  xlab("Probability Difference (Mainland China - Hong Kong)") + ylab("Response") + 
  xlim(c(-.13,.1)) + scale_shape(solid = TRUE) + theme_classic()  + 
  geom_point(alpha = .7, col = "grey75") + ggtitle("Cue Word: Central Government") + 
  geom_vline(xintercept = 0, linetype = "dashed", size = .3)  + 
  scale_size_continuous(breaks = c(10, 20, 50, 100, 200, 500), range = c(1, 10), name = "Total Frequency") + 
  theme(plot.title = element_text(size = 11))
dev.off()

## CCP - Non CCP Samples Comparison ##

# CCP Sample
data.ccp.wat.cg <- filter(data.wat.cg, ccp == 1)
data.ccp.wat.ccp <- filter(data.wat.ccp, ccp == 1)
data.ccp.wat.democracy <- filter(data.wat.democracy, ccp == 1)
data.ccp.wat.china <- filter(data.wat.china, ccp == 1)

# Central government
fw.ccp.cg <- segment(data.ccp.wat.cg$response, cutter) %>%
  as_tibble() %>%
  filter(!str_detect(value, "中央政府|中央|政府")) %>%
  count(value, name = "freq", sort = TRUE) %>%
  mutate(
    prop = freq / length(data.ccp.wat.cg$ResponseID),
    words = value
  ) %>%
  select(words, freq, prop) %>% 
  left_join(fw.full.key, by = "words")

# Democracy
fw.ccp.dem <- segment(data.ccp.wat.democracy$response, cutter) %>%
  as_tibble() %>%
  filter(!str_detect(value, "民主")) %>%
  count(value, name = "freq", sort = TRUE) %>%
  mutate(
    prop = freq / length(data.ccp.wat.democracy$ResponseID),
    words = value
  ) %>%
  select(words, freq, prop) %>% 
  left_join(fw.full.key, by = "words")

# China
fw.ccp.china <- segment(data.ccp.wat.china$response, cutter) %>%
  as_tibble() %>%
  filter(!str_detect(value, "中国")) %>%
  count(value, name = "freq", sort = TRUE) %>%
  mutate(
    prop = freq / length(data.ccp.wat.china$ResponseID),
    words = value
  ) %>%
  select(words, freq, prop) %>% 
  left_join(fw.full.key, by = "words")

# CCP
fw.ccp.ccp <- segment(data.ccp.wat.ccp$response, cutter) %>%
  as_tibble() %>%
  filter(!str_detect(value, "中共")) %>%
  count(value, name = "freq", sort = TRUE) %>%
  mutate(
    prop = freq / length(data.ccp.wat.ccp$ResponseID),
    words = value
  ) %>%
  select(words, freq, prop) %>% 
  left_join(fw.full.key, by = "words")

# Non-CCP Sample
data.nccp.wat.cg <- filter(data.wat.cg, ccp == 0)
data.nccp.wat.ccp <- filter(data.wat.ccp, ccp == 0)
data.nccp.wat.democracy <- filter(data.wat.democracy, ccp == 0)
data.nccp.wat.china <- filter(data.wat.china, ccp == 0)

# Central government
fw.nccp.cg <- segment(data.nccp.wat.cg$response, cutter) %>%
  as_tibble() %>%
  filter(!str_detect(value, "中央政府|中央|政府")) %>%
  count(value, name = "freq", sort = TRUE) %>%
  mutate(
    prop = freq / length(data.nccp.wat.cg$ResponseID),
    words = value
  ) %>%
  select(words, freq, prop) %>% 
  left_join(fw.full.key, by = "words")

# Democracy
fw.nccp.dem <- segment(data.nccp.wat.democracy$response, cutter) %>%
  as_tibble() %>%
  filter(!str_detect(value, "民主")) %>%
  count(value, name = "freq", sort = TRUE) %>%
  mutate(
    prop = freq / length(data.nccp.wat.democracy$ResponseID),
    words = value
  ) %>%
  select(words, freq, prop) %>% 
  left_join(fw.full.key, by = "words")

# China
fw.nccp.china <- segment(data.nccp.wat.china$response, cutter) %>%
  as_tibble() %>%
  filter(!str_detect(value, "中国")) %>%
  count(value, name = "freq", sort = TRUE) %>%
  mutate(
    prop = freq / length(data.nccp.wat.china$ResponseID),
    words = value
  ) %>%
  select(words, freq, prop) %>% 
  left_join(fw.full.key, by = "words")

# CCP
fw.nccp.ccp <- segment(data.nccp.wat.ccp$response, cutter) %>%
  as_tibble() %>%
  filter(!str_detect(value, "中共")) %>%
  count(value, name = "freq", sort = TRUE) %>%
  mutate(
    prop = freq / length(data.nccp.wat.ccp$ResponseID),
    words = value
  ) %>%
  select(words, freq, prop) %>% 
  left_join(fw.full.key, by = "words")

# Figure SI11: CCP and Non-CCP Members Response Comparison for China Cue
fw.nccp.china <- fw.nccp.china %>%
  distinct(words, .keep_all = TRUE)

fw.ccp.china <- fw.ccp.china %>%
  distinct(words, .keep_all = TRUE)

subcomp.china <- fw.ccp.china %>%
  full_join(fw.nccp.china, by = "words") %>%
  mutate(
    freq.tot = coalesce(freq.x, 0) + coalesce(freq.y, 0),
    prop.diff = coalesce(prop.x, 0) - coalesce(prop.y, 0),
    term.eng = coalesce(term.eng.x, term.eng.y)
  ) %>%
  filter(freq.tot > 9)

pdf('./figures/fig-subcomp-china.pdf', width=8, height=7)
ggplot(subcomp.china, aes(x = prop.diff, y = reorder(term.eng,prop.diff), size = freq.tot)) + 
  xlab("Probability Difference (CCP Members - Non-Members)") + ylab("Response") + 
  xlim(c(-.02, .03)) + scale_shape(solid = TRUE) + theme_classic()  + geom_point(alpha=.7, col = "grey75") + 
  ggtitle("Cue Word: China") + geom_vline(xintercept = 0, linetype = "dashed", size = .3)  + 
  scale_size_continuous(breaks = c(10, 20, 50, 100, 200), range = c(1, 10), name = "Total Frequency") + 
  theme(plot.title = element_text(size = 11))
dev.off() 

# Figure SI12: CCP and Non-CCP Members Response Comparison for CCP Cue
fw.nccp.ccp <- fw.nccp.ccp %>%
  distinct(words, .keep_all = TRUE)

fw.ccp.ccp <- fw.ccp.ccp %>%
  distinct(words, .keep_all = TRUE)

subcomp.ccp <- fw.ccp.ccp %>%
  full_join(fw.nccp.ccp, by = "words") %>%
  mutate(
    freq.tot = coalesce(freq.x, 0) + coalesce(freq.y, 0),
    prop.diff = coalesce(prop.x, 0) - coalesce(prop.y, 0),
    term.eng = coalesce(term.eng.x, term.eng.y)
  ) %>%
  filter(freq.tot > 9)

pdf('./figures/fig-subcomp-ccp.pdf', width=8, height=7)
ggplot(subcomp.ccp, aes(x = prop.diff, y = reorder(term.eng,prop.diff), size = freq.tot)) + 
  xlab("Probability Difference (CCP Members - Non-Members)") + ylab("Response") + 
  xlim(c(-.02, .06)) + scale_shape(solid = TRUE) + theme_classic()  + geom_point(alpha=.7, col = "grey75") + 
  ggtitle("Cue Word: CCP") + geom_vline(xintercept = 0, linetype = "dashed", size = .3)  + 
  scale_size_continuous(breaks = c(10, 20, 50, 100), range = c(1, 10), name = "Total Frequency") + 
  theme(plot.title = element_text(size = 11))
dev.off()

# Figure SI13: CCP and Non-CCP Members Response Comparison for Central Government Cue
fw.nccp.cg <- fw.nccp.cg %>%
  distinct(words, .keep_all = TRUE)

fw.ccp.cg <- fw.ccp.cg %>%
  distinct(words, .keep_all = TRUE)

subcomp.cg <- fw.ccp.cg %>%
  full_join(fw.nccp.cg, by = "words") %>%
  mutate(
    freq.tot = coalesce(freq.x, 0) + coalesce(freq.y, 0),
    prop.diff = coalesce(prop.x, 0) - coalesce(prop.y, 0),
    term.eng = coalesce(term.eng.x, term.eng.y)
  ) %>%
  filter(freq.tot > 9)

pdf('./figures/fig-subcomp-cg.pdf', width=8, height=7)
ggplot(subcomp.cg, aes(x = prop.diff, y = reorder(term.eng,prop.diff), size = freq.tot)) + 
  xlab("Probability Difference (CCP Members - Non-Members)") + ylab("Response") + 
  xlim(c(-.03, .03)) + scale_shape(solid = TRUE) + theme_classic()  + geom_point(alpha=.7, col = "grey75") + 
  ggtitle("Cue Word: Central Government") + geom_vline(xintercept = 0, linetype = "dashed", size = .3)  + 
  scale_size_continuous(breaks = c(10, 20, 50, 100), range = c(1, 10), name = "Total Frequency") + 
  theme(plot.title = element_text(size = 11))
dev.off()

# Figure SI14: CCP and Non-CCP Members Response Comparison for Democracy Cue
fw.nccp.dem <- fw.nccp.dem %>%
  distinct(words, .keep_all = TRUE)

fw.ccp.dem <- fw.ccp.dem %>%
  distinct(words, .keep_all = TRUE)

subcomp.dem <- fw.ccp.dem %>%
  full_join(fw.nccp.dem, by = "words") %>%
  mutate(
    freq.tot = coalesce(freq.x, 0) + coalesce(freq.y, 0),
    prop.diff = coalesce(prop.x, 0) - coalesce(prop.y, 0),
    term.eng = coalesce(term.eng.x, term.eng.y)
  ) %>%
  filter(freq.tot > 9)

pdf('./figures/fig-subcomp-dem.pdf', width=8, height=7)
ggplot(subcomp.dem, aes(x = prop.diff, y = reorder(term.eng,prop.diff), size = freq.tot)) + 
  xlab("Probability Difference (CCP Members - Non-Members)") + ylab("Response") + 
  xlim(c(-.03, .05)) + scale_shape(solid = TRUE) + theme_classic()  + geom_point(alpha=.7, col = "grey75") + 
  ggtitle("Cue Word: Democracy") + geom_vline(xintercept = 0, linetype = "dashed", size = .3)  + 
  scale_size_continuous(breaks = c(10, 20, 50, 100, 200, 500), range = c(1, 10), name = "Total Frequency") + 
  theme(plot.title = element_text(size = 11))
dev.off()
